﻿using System.Collections.Generic;
using System.Text.RegularExpressions;

namespace StarHelper
{
    public static class RegexHelper
    {
        /// <summary>
        /// 匹配所有Html标签
        /// </summary>
        const string HTMLALLTAG = @"<[^>]+>|</[^>]+>";

        /// <summary>
        /// 删除所有html标签
        /// </summary>
        /// <param name="content">原HTML代码</param>
        /// <returns></returns>
        public static string RemoveAllHtml(string content)
        {
            return Regex.Replace(content, HTMLALLTAG, "");
        }

        /// <summary>
        /// 根据正则匹配获取指定内容
        /// </summary>
        /// <param name="regStr">正则</param>
        /// <param name="content">原HTML代码</param>
        /// <param name="hashtml">是否包含HTML标签</param>
        /// <returns></returns>
        public static string GetStrByRegex(string regStr, string content, bool hashtml = true)
        {
            string result = string.Empty;
            Regex reg = new Regex(regStr);
            Match mth = reg.Match(content);

            if (mth.Success)
            {
                result = mth.Value;
                if (!hashtml) result = RemoveAllHtml(result); //去除html标签

            }
            return result;
        }

        /// <summary>
        /// 获取指定位置的html代码
        /// </summary>
        /// <param name="start">起始字符串</param>
        /// <param name="end">结束字符串</param>
        /// <param name="content">原HTML代码</param>
        /// <param name="hasHtml">是否包含HTML标签 默认false</param>
        /// <returns></returns>
        public static string GetStrByRegex(string start, string end, string content, bool hasHtml = false)
        {
            string result = string.Empty;
            string regStr = @"(?is)(" + start + ").*?(" + end + ")";
            Regex reg = new Regex(regStr);
            Match mth = reg.Match(content);
            if (mth.Success)
            {
                result = mth.Value;
                if (!hasHtml) result = RemoveAllHtml(result); //去除html标签
            }
            return result;
        }

        /// <summary>
        /// 获取匹配的字符串列表
        /// </summary>
        /// <param name="regStr">正则</param>
        /// <param name="content">原HTML代码</param>
        /// <returns></returns>
        public static List<string> GetStrListByRegex(string regStr, string content)
        {
            List<string> strList = null;
            MatchCollection mc = null;
            try
            {
                Regex reg = new Regex(regStr);
                mc = reg.Matches(content);
                if (mc.Count > 0)
                {
                    strList = new List<string>();
                    for (int i = 0; i < mc.Count; i++)
                    {
                        strList.Add(mc[i].Value);
                    }
                }
            }
            catch
            {
                strList = null;
            }
            return strList;
        }

        /// <summary>
        /// 获取匹配的字符串列表
        /// </summary>
        /// <param name="start">起始字符串</param>
        /// <param name="end">结束字符串</param>
        /// <param name="content">原HTML代码</param>
        /// <returns></returns>
        public static List<string> GetStrListByRegex(string start, string end, string content)
        {
            List<string> strList = null;
            MatchCollection mc = null;
            string regStr = @"(?is)(" + start + ").*?(" + end + ")";
            try
            {
                Regex reg = new Regex(regStr);
                mc = reg.Matches(content);
                if (mc.Count > 0)
                {
                    strList = new List<string>();
                    for (int i = 0; i < mc.Count; i++)
                    {
                        strList.Add(mc[i].Value);
                    }
                }
            }
            catch
            {
                strList = null;
            }
            return strList;
        }

        /// <summary>
        /// 从文本内容中正则提取img标签图片链接列表
        /// </summary>
        /// <param name="content">含img标签图片的文本内容</param>
        /// <returns></returns>
        public static List<string> RegexImgs(this string content)
        {
            // 正则表达式匹配URL                
            Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
            MatchCollection matches = regImg.Matches(content);
            List<string> list = new List<string>();
            // 下载并保存图片
            foreach (Match match in matches)
            {
                list.Add(match.Groups["imgUrl"].Value);
            }
            return list;
        }
    }
}
